import os
import sodapy
from sodapy import Socrata
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import contextily as ctx
%matplotlib inline
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
gpd.__version__
# url method for reading in buffalo 311 service requests dataset
#url='https://data.buffalony.gov/resource/whkc-e5vr.geojson?$limit=1000000'
#buffalo_311=gpd.read_file(url)
# SODA API method for reading in buffalo 311 service requests dataset
socrata_domain='data.buffalony.gov'
socrata_dataset_identifier='whkc-e5vr'
app_token=os.environ.get('SODAPY_APPTOKEN')
client=Socrata(socrata_domain,app_token)
dict=client.get(socrata_dataset_identifier,content_type='geojson?$limit=1000000',\
#where ='',\
select='latitude,longitude,address_number,address_line_1,type')
buffalo_311=gpd.GeoDataFrame.from_features(dict)
# set the geometry for the GeoDataFrame to the longitude and latitude values and initial projection epsg:4326
buffalo_311 = gpd.GeoDataFrame(
buffalo_311,crs='epsg:4326',geometry=gpd.points_from_xy(buffalo_311.longitude.astype(float), buffalo_311.latitude.astype(float)))
buffalo_311.shape
#drop any NaN values
buffalo_311.dropna(how='any',axis=0,inplace=True)
buffalo_311.shape
#removing any missing geometry data
nrows=buffalo_311.shape[0]
buffalo_311=buffalo_311.loc[buffalo_311.geometry.notnull()]
krows=buffalo_311.shape[0]
removed=nrows-krows
pctremoved=(removed/nrows)*100
print("Original number of rows = {}\
, Number of rows missing coordinates = {}\
, Percent missing data = {:.1f}%".format(nrows,removed,pctremoved))
if pctremoved>10:
print("WARNING: Percent missing location data exceeds recommended limit!")
buffalo_311=buffalo_311.to_crs('epsg:3857')
# new data frame with split address column
new_list = buffalo_311['address_line_1'].str.split(' ',n=1,expand = True)
# making separate column for first part of street name
buffalo_311['name']= new_list[0]
#making new column address with the street # and first part of street name only
buffalo_311['address']=buffalo_311['address_number']+' '+buffalo_311['name']
buffalo_311.head()
# read in buffalo-clean dataset
buffalo_clean=pd.read_csv('https://drive.google.com/uc?id=1jIBr3W0p28VJj0TWJGtqKjxu-ejyenw0',low_memory=False)
# new data frame with split address column
new_list = buffalo_clean['address'].str.split(' ',n=2,expand = True)
# making separate column for first part of street name
buffalo_clean['address']= new_list[0]+' '+new_list[1]
# merging buffalo-clean data with buffalo 311 data on 'address'
clean_311=pd.merge(buffalo_311,buffalo_clean,on='address')
clean_311.shape
# keeping only vacant properties
clean_311_y=clean_311.loc[clean_311['vacant_']=='Y']
clean_311_y.shape
# keeping only relevant columns
clean_311_y=clean_311_y[['address','type','geometry','latitude','longitude']]
clean_311_y.head()
# bringing in neighborhood data for polygon layer
url='https://data.buffalony.gov/resource/pg8k-g5iz.geojson?$limit=1000000'
hoods=gpd.read_file(url)
hoods=hoods.to_crs('epsg:3857')
#removing any missing geometry data
nrows=hoods.shape[0]
hoods=hoods.loc[hoods.geometry.notnull()]
krows=hoods.shape[0]
removed=nrows-krows
pctremoved=(removed/nrows)*100
print("Original number of rows = {}\
, Number of rows missing coordinates = {}\
, Percent missing data = {:.1f}%".format(nrows,removed,pctremoved))
if pctremoved>10:
print("WARNING: Percent missing location data exceeds recommended limit!")
# plotting common addresses between vacant buffalo-clean and buffalo 311 service request
hoods_poly=hoods.plot(alpha=0.5,figsize=(10,10),edgecolor='black',color='dodgerblue',linewidth=2)
ctx.add_basemap(hoods_poly,source=ctx.providers.Stamen.TonerLite)
clean_311_y.plot(ax=hoods_poly,marker='o',color='red',markersize=50,alpha=1,edgecolor='black',linewidth=1)
plt.title('Buffalo Vacant Homes',fontsize=20)
plt.axis('off');
# plotting interactive common addresses between vacant buffalo-clean and buffalo 311 service request
from bokeh.tile_providers import CARTODBPOSITRON, get_provider
tileProvider = get_provider('CARTODBPOSITRON_RETINA')
from bokeh.io import output_notebook, show, output_file, save
from bokeh.plotting import figure
from bokeh.models import (HoverTool, GeoJSONDataSource, LogColorMapper, ColorBar)
from bokeh.transform import linear_cmap,log_cmap
import bokeh.palettes
output_notebook()
TOOLS="pan,wheel_zoom,box_zoom,reset,save"
f = figure(title='',tools=TOOLS,\
plot_width=800, plot_height=700,\
outline_line_color=None,\
min_border=0,min_border_left=0,min_border_right=0,\
min_border_top=0,min_border_bottom=0)
f.add_tile(tileProvider)
f.title.text_font_style='italic'
f.title.text_font_size='14pt'
f.axis.visible = False
point=clean_311_y.to_crs('epsg:3857')
poly=hoods.to_crs('epsg:3857')
point_source= GeoJSONDataSource(geojson=point.to_json())
poly_source= GeoJSONDataSource(geojson=poly.to_json())
areas=f.patches('xs','ys',source=poly_source,
fill_color='dodgerblue',fill_alpha=0.5,line_color='black',line_width=0.5)
circles=f.circle('x','y', size=10,fill_color='red',line_color='black',
fill_alpha=1,source=point_source)
c_hover=HoverTool(renderers=[circles])
c_hover.point_policy='follow_mouse'
c_hover.tooltips=[('Type:', '@type'),
('Address', '@address')]
f.add_tools(c_hover)
output_file('clean_311_y.html',title='clean_311_y')
show(f)